**First decade - sample 2**
*Load data*
clear

use "Results\tab_B3\Scenario_3\sample1_second_decade.dta"


forvalues i=81(1)85{
	
*9th grade
gen temp_g=wage_growth_ambition if final_educ==1109`i'
egen temp_g_2=max(temp_g)
replace wage_growth_ambition=temp_g_2 if (final_educ==1007 | final_educ==1008 | final_educ==1023 | final_educ==1123 | final_educ==1009 | final_educ==1022) & grad_region==`i' 
drop temp_g temp_g_2

gen temp_w=wage_start_mean_ambition if final_educ==1109`i'
egen temp_w_2=max(temp_w)
replace wage_start_mean_ambition=temp_w_2 if (final_educ==1007 | final_educ==1008 | final_educ==1023 | final_educ==1123 | final_educ==1009 | final_educ==1022) & grad_region==`i' 
drop temp_w temp_w_2

replace final_educ=1107`i' if final_educ==1107 & grad_region==`i'
replace final_educ=1008`i' if final_educ==1008 & grad_region==`i'
replace final_educ=1023`i' if final_educ==1023 & grad_region==`i'
replace final_educ=1123`i' if final_educ==1123 & grad_region==`i'
replace final_educ=1009`i' if final_educ==1009 & grad_region==`i'
replace final_educ=1022`i' if final_educ==1022 & grad_region==`i'

*10th grade
gen temp_g=wage_growth_ambition if final_educ==1110`i'
egen temp_g_2=max(temp_g)
replace wage_growth_ambition=temp_g_2 if final_educ==1010 & grad_region==`i' 
drop temp_g temp_g_2

gen temp_w=wage_start_mean_ambition if final_educ==1110`i'
egen temp_w_2=max(temp_w)
replace wage_start_mean_ambition=temp_w_2 if final_educ==1010 & grad_region==`i' 
drop temp_w temp_w_2

replace final_educ=1010`i' if final_educ==1010 & grad_region==`i'

}

*3.g 
gen temp_g=wage_growth_ambition if final_educ==1198
egen temp_g_2=max(temp_g)
replace wage_growth_ambition=temp_g_2 if final_educ==1097
drop temp_g temp_g_2

gen temp_w=wage_start_mean_ambition if final_educ==1198
egen temp_w_2=max(temp_w)
replace wage_start_mean_ambition=temp_w_2 if final_educ==1097
drop temp_w temp_w_2




**kmeans**
*standardize variables
sum wage_start_mean_ambition
sca the_mean_s=r(mean)
sca the_sd_s=r(sd)
gen wage_start_mean_ambition_s=(wage_start_mean_ambition-the_mean_s)/the_sd_s
sum wage_start_mean_ambition_s

sum wage_growth_ambition
sca the_mean_g=r(mean)
sca the_sd_g=r(sd)
gen wage_growth_ambition_s=(wage_growth_ambition-the_mean_g)/the_sd_g
sum wage_growth_ambition_s

cluster kmeans wage_start_mean_ambition_s wage_growth_ambition_s, k(4) name(ambition_type_k_4_s) s(kr(1234))
tab ambition_type_k_4_s
tabstat wage_start_mean_ambition_s wage_growth_ambition_s, by(ambition_type_k_4_s)
tab ambition_type_k_4_s fined, row




**SAVE - CAN MERGE ON TO COUPLE SAMPLE**
save "Results\tab_B3\Scenario_3\sample2_second_decade.dta", replace

